In [1]:
import os
from datetime import datetime
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
os.chdir("..")

In [3]:
from Signals.BitcoinData import BitcoinData
from Signals.WikipediaData import WikipediaData
from Signals.FXData import FXData

Download data for relevant range and normalize.


In [4]:
bd = BitcoinData()
bitcoin = bd.get(datetime(2016, 1, 1), datetime(2016, 6, 1))

In [5]:
wd = WikipediaData()
wiki = wd.get(datetime(2016, 1, 1), datetime(2016, 6, 1))

In [6]:
fxd = FXData()
fx = fxd.get(datetime(2016, 1, 1), datetime(2016, 6, 1))

In [7]:
x = bitcoin.merge(fx, how='outer', left_index=True, right_index=True)\
           .merge(wiki, how='outer', left_index=True, right_index=True)

Fill forward missing data.


In [8]:
x['USDCNY'].fillna(method='pad', inplace=True)
x['USDEUR'].fillna(method='pad', inplace=True)
x['VIX'].fillna(method='pad', inplace=True)

Add transformations.


In [9]:
x['next_open'] = x['Open'].shift(-1)
x['change'] = x['next_open'] - x['Open']
x['next_day_higher'] = x['change'].map(lambda x: x > 0)

In [10]:
def addlogret(df, col):
    df[col+'_logret'] = np.log(df[col]) - np.log(df[col].shift(1))
def addewma(df, col):
    for i in [3, 10, 30]:
        df[col+'_ewma_'+str(i)] = df[col].ewm(halflife=i).mean()
def addewmvar(df, col):
    for i in [3, 10, 30]:
        df[col+'_ewmvar_'+str(i)] = df[col].ewm(halflife=i).var()

In [11]:
for col in ['Open', 'Volume', 'USDCNY', 'USDEUR', 'VIX', 'views']:
    addlogret(x, col)
    addewma(x, col)
    addewma(x, col+"_logret")
    addewmvar(x, col)

In [12]:
x.dropna(inplace=True)

Build logistic regression model.


In [13]:
y = x['next_day_higher']

In [14]:
x.drop(['next_day_higher'], axis=1, inplace=True)

In [15]:
from sklearn import linear_model

In [16]:
logreg = linear_model.LogisticRegression(C=1e5)
logreg.fit(x, y)


Out[16]:
LogisticRegression(C=100000.0, class_weight=None, dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='ovr', n_jobs=1, penalty='l2', random_state=None,
          solver='liblinear', tol=0.0001, verbose=0, warm_start=False)

In [17]:
print("Model score: {:.1f}%".format(100*logreg.score(x, y)))


Model score: 56.5%

Test predictions.


In [18]:
from PerfAnalysis.PnL import PnL

In [19]:
pnl = PnL()
pnl.calc_pnl(x, y, price_col="Open")


Out[19]:
{'APR': 212.82985561414253,
 'cash': 0,
 'position': 0.51604581259045823,
 'value': 269.0353239359095}

213% (annualized) return over the training set, even including 25 bp commission, but how about the final six months of the year?


In [20]:
bitcoin = bd.get(datetime(2016, 6, 1), datetime(2017, 1, 1))
wiki = wd.get(datetime(2016, 6, 1), datetime(2017, 1, 1))
fx = fxd.get(datetime(2016, 6, 1), datetime(2017, 1, 1))
x = bitcoin.merge(fx, how='outer', left_index=True, right_index=True)\
           .merge(wiki, how='outer', left_index=True, right_index=True)
x['USDCNY'].fillna(method='pad', inplace=True)
x['USDEUR'].fillna(method='pad', inplace=True)
x['VIX'].fillna(method='pad', inplace=True)
x['next_open'] = x['Open'].shift(-1)
x['change'] = x['next_open'] - x['Open']
x['next_day_higher'] = x['change'].map(lambda x: x > 0)
for col in ['Open', 'Volume', 'USDCNY', 'USDEUR', 'VIX', 'views']:
    addlogret(x, col)
    addewma(x, col)
    addewma(x, col+"_logret")
    addewmvar(x, col)
x.drop(['next_day_higher'], axis=1, inplace=True)
x.dropna(inplace=True)

In [21]:
pred = logreg.predict(x)

In [22]:
pnl.calc_pnl(x, pred, price_col="Open")


Out[22]:
{'APR': -111.22094549159256,
 'cash': 74.649234306898478,
 'position': 0,
 'value': 74.649234306898478}

A ha, much less successful on the test dataset.